github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/interactive/examples/Interactive Beam Example.ipynb (about)

     1  {
     2   "cells": [
     3    {
     4     "cell_type": "markdown",
     5     "metadata": {},
     6     "source": [
     7      "<!--\n",
     8      "    Licensed to the Apache Software Foundation (ASF) under one\n",
     9      "    or more contributor license agreements.  See the NOTICE file\n",
    10      "    distributed with this work for additional information\n",
    11      "    regarding copyright ownership.  The ASF licenses this file\n",
    12      "    to you under the Apache License, Version 2.0 (the\n",
    13      "    \"License\"); you may not use this file except in compliance\n",
    14      "    with the License.  You may obtain a copy of the License at\n",
    15      "\n",
    16      "      http://www.apache.org/licenses/LICENSE-2.0\n",
    17      "\n",
    18      "    Unless required by applicable law or agreed to in writing,\n",
    19      "    software distributed under the License is distributed on an\n",
    20      "    \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
    21      "    KIND, either express or implied.  See the License for the\n",
    22      "    specific language governing permissions and limitations\n",
    23      "    under the License.\n",
    24      "-->\n",
    25      "\n",
    26      "# Interactive Beam Examples"
    27     ]
    28    },
    29    {
    30     "cell_type": "code",
    31     "execution_count": null,
    32     "metadata": {},
    33     "outputs": [],
    34     "source": [
    35      "import apache_beam as beam\n",
    36      "from apache_beam.runners.interactive import interactive_runner\n",
    37      "from apache_beam.runners.interactive.interactive_beam import *"
    38     ]
    39    },
    40    {
    41     "cell_type": "code",
    42     "execution_count": null,
    43     "metadata": {},
    44     "outputs": [],
    45     "source": [
    46      "p = beam.Pipeline(interactive_runner.InteractiveRunner())"
    47     ]
    48    },
    49    {
    50     "cell_type": "code",
    51     "execution_count": null,
    52     "metadata": {},
    53     "outputs": [],
    54     "source": [
    55      "init_pcoll = p |  beam.Create(range(10))\n",
    56      "show(init_pcoll)"
    57     ]
    58    },
    59    {
    60     "cell_type": "code",
    61     "execution_count": null,
    62     "metadata": {},
    63     "outputs": [],
    64     "source": [
    65      "squares = init_pcoll | 'Square' >> beam.Map(lambda x: x*x)\n",
    66      "show(squares)"
    67     ]
    68    },
    69    {
    70     "cell_type": "code",
    71     "execution_count": null,
    72     "metadata": {},
    73     "outputs": [],
    74     "source": [
    75      "cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3)\n",
    76      "show(cubes)"
    77     ]
    78    },
    79    {
    80     "cell_type": "code",
    81     "execution_count": null,
    82     "metadata": {},
    83     "outputs": [],
    84     "source": [
    85      "result = p.run()\n",
    86      "init_list = list(range(10))\n",
    87      "squares_list = list(result.get(squares))\n",
    88      "cubes_list = list(result.get(cubes))\n",
    89      "\n",
    90      "squares_list.sort()\n",
    91      "cubes_list.sort()\n",
    92      "\n",
    93      "!pip install matplotlib\n",
    94      "\n",
    95      "%matplotlib inline\n",
    96      "from matplotlib import pyplot as plt\n",
    97      "plt.scatter(init_list, squares_list, label='squares', color='red')\n",
    98      "plt.scatter(init_list, cubes_list, label='cubes', color='blue')\n",
    99      "plt.legend(loc='upper left')\n",
   100      "plt.show()"
   101     ]
   102    },
   103    {
   104     "cell_type": "code",
   105     "execution_count": null,
   106     "metadata": {},
   107     "outputs": [],
   108     "source": [
   109      "class AverageFn(beam.CombineFn):\n",
   110      "  def create_accumulator(self):\n",
   111      "    return (0.0, 0)\n",
   112      "\n",
   113      "  def add_input(self, sum_count, input):\n",
   114      "    (sum, count) = sum_count\n",
   115      "    return sum + input, count + 1\n",
   116      "\n",
   117      "  def merge_accumulators(self, accumulators):\n",
   118      "    sums, counts = zip(*accumulators)\n",
   119      "    return sum(sums), sum(counts)\n",
   120      "\n",
   121      "  def extract_output(self, sum_count):\n",
   122      "    (sum, count) = sum_count\n",
   123      "    return sum / count if count else float('NaN')"
   124     ]
   125    },
   126    {
   127     "cell_type": "code",
   128     "execution_count": null,
   129     "metadata": {
   130      "scrolled": true
   131     },
   132     "outputs": [],
   133     "source": [
   134      "average_square = squares | 'Average Square' >> beam.CombineGlobally(AverageFn())\n",
   135      "average_cube = cubes | 'Average Cube' >> beam.CombineGlobally(AverageFn())"
   136     ]
   137    },
   138    {
   139     "cell_type": "code",
   140     "execution_count": null,
   141     "metadata": {},
   142     "outputs": [],
   143     "source": [
   144      "show(average_square)"
   145     ]
   146    },
   147    {
   148     "cell_type": "code",
   149     "execution_count": null,
   150     "metadata": {},
   151     "outputs": [],
   152     "source": [
   153      "show(average_cube)"
   154     ]
   155    },
   156    {
   157     "cell_type": "code",
   158     "execution_count": null,
   159     "metadata": {},
   160     "outputs": [],
   161     "source": [
   162      "p.run()"
   163     ]
   164    }
   165   ],
   166   "metadata": {
   167    "language_info": {
   168     "codemirror_mode": {
   169      "name": "ipython",
   170      "version": 3
   171     },
   172     "file_extension": ".py",
   173     "mimetype": "text/x-python",
   174     "name": "python",
   175     "nbconvert_exporter": "python",
   176     "pygments_lexer": "ipython3",
   177     "version": "3.7.4"
   178    }
   179   },
   180   "nbformat": 4,
   181   "nbformat_minor": 4
   182  }